set more off
clear all

global DIR Dropbox\INE\data\bhh

cap log close
log using $DIR\cr_sample,t replace
run $DIR\fillin

use $DIR\enia_inno_ip_ex, clear
drop if year>2005 /* remove non-ENIA obs */

*********************************************************************************
*** Basic recoding
sort id year
egen minyear = min(year),by(id)
label var minyear "First year of data for this firm"

*** recode small ind categories
replace sic2d = 24 if sic2d==23
replace sic2d = 31 if sic2d==30

********************************************************************************
*** Do IP variables before deleting pre-1995 observations

*** define first year IP of any kind is used
sort id year
foreach x in pat tm um des {
  gen d`x' = napp_`x'>0
}  
gen dip = napp_pat>0 | napp_tm>0 | napp_um>0 | napp_des>0
gen doth = napp_um>0 | napp_des>0

foreach x in ip pat tm des um oth {
  egen xx = sum(d`x'),by(id)
  gen `x' = xx>0
  drop x
  }
  
label var dip "Used IP this year"
label var dpat "Used patents this year"
label var dtm "Used trademarks this year"
label var ddes "Used design rights this year"
label var dum "Used utility model this year"
label var doth "Used utility model or designs this year"
label var ip "Ever used any IP"
label var pat "Ever used patents"
label var tm "Ever used trademarks"
label var um "Ever used utility models"
label var des "Ever used design rights"
label var oth "Ever used utility models or designs"

foreach var of varlist ip tm pat oth { 
  tab year d`var'
  gen x = (id==id[_n-1] & ~d`var'[_n-1] & d`var') | (id~=id[_n-1] & d`var')
  replace x = year*x 
  replace x = . if x==0
  egen firstyr = min(x),by(id)
  drop x

*** define ip samples
  gen `var'first = firstyr~=. & firstyr~=minyear & firstyr==year
  label var `var'first "Have begun using IP in this year"
  gen `var'old = firstyr~=. & firstyr==minyear
  label var `var'old "Old IP user, all years"
  gen `var'new = firstyr~=. & firstyr~=minyear & year>=firstyr
  label var `var'new "New IP user, first year and later"
  gen lag_`var' = year-firstyr
  replace lag_`var' = . if lag_`var'<0
  label var lag_`var' "Years since first IP use, missing if not"
  rename firstyr firstyr_`var'
  label var firstyr_`var' "D(first year firm used `var')"
  replace firstyr_`var' = 0 if firstyr_`var'==.
}

************************************************************************************************
*** Recoding ENIA variables
*** Remove non_ENIA obs 
drop if totrev==. | employ==0

*** Convert all quantities from thousands to million pesos (capital stock is already converted). 
foreach var of varlist totrev materials payroll valadd cap_exp cap_sale tdeprec exports elecomp employ {
        replace `var' = `var'/1000
		}

gen santiago = region==13
gen foreign = ownership==2
gen mixed = ownership==3
gen public = ownership==4 | legalorg==7
gen indiv = (legalorg==1 | legalorg==3) & ~foreign & ~mixed
label var santiago "Location in Santiago metro area"
label var foreign "Foreign ownership"
label var mixed "Foreign & domestic ownership"
label var public "Public firm"
label var indiv "Individual or partnership"

gen logs = log(totrev)
gen logm = log(materials+.001)
gen loge = log(employ+.001)
gen logpr = log(payroll+.001)
gen logy = log(valadd)
label var logs "Log(revenue in million pesos)"
label var logm "Log(materials purchase in million pesos)"
label var loge "Log(number of employees in 1000s)"
label var logpr "Log(payroll expense in million pesos)"
label var logy "Log(value added in million pesos)"

*** Investment
sort id year
*** Some investment, depreciation, and capital sales values are clearly too large,
*** probably in thousands rather than millions
gen ratio = tdeprec/capstk
replace tdeprec = tdeprec/1000 if ratio>10 & ratio~=.
gen invest = cap_exp
replace ratio = cap_sale/capstk
replace cap_sale = cap_sale/1000 if ratio>10 & ratio~=.
*** Can't sell more capital than you have. 
replace cap_sale = min(cap_sale,capstk)
replace ratio = invest/capstk
replace invest = invest/1000 if ratio>10 & ratio~=.
drop ratio
gen net_invest = cap_exp-cap_sale-tdeprec
label var invest "Gross investment in capital/fixed assets"
label var net_invest "Net investment in capital/fixed assets"
gen logi = log(invest+.001) 
label var logi "Log(investment in millions pesos)"

*** Capital stock
replace capstk = . if capstk<=0 | cap_error
count if capstk==. & totrev~=.
fillin capstk id year
count if capstk==. & totrev~=.
gen loga = log(capstk[_n-1] + .001) if id==id[_n-1]
*** Fill in BOY log capital for first opbservation; if net investment is very negative, 
*** just use current level of capital stock.
replace loga = log(capstk - net_invest) if id~=id[_n-1]
replace loga = log(capstk + 0.001) if loga==. & id~=id[_n-1]
label var loga "Log(BOY capital sotck in million pesos)"

gen logelec = log(elecomp+.001)
gen logselec = logs - logelec
*** Do it again in case of filled in observations on capstk
gen invratio = invest/capstk
replace invest = invest/1000 if invratio>10 & invratio~=.
replace invratio = invest/capstk
gen logse = logs-loge
gen logme = logm-loge
gen logae = loga-loge
gen logye = logy-loge

drop if cap_error
drop if employ==. 
drop if loga==. 

drop if invratio>10 
drop if logselec<-1 | logselec>13
drop if logse>15
drop if logae<-5

gen expshare = exports/totrev
replace expshare = 1 if expshare>1
label var expshare "Export share in sales"
gen dexp = exports>0
label var dexp "D (exporting firm)"
gen skshare = (emp_skill_m + emp_skill_f)/employ
gen hiskill = skshare>0.2
label var skshare "Skilled employment share"
label var hiskill "D (high-skilled employment>20%)"

*** Recode sic2d to fold small industries into larger ones
egen x = mean(sic2d),by(id)
gen ind = int(x)
drop x
tab ind
replace ind = 15 if ind==16
replace ind = 24 if ind==23
replace ind = 31 if ind==30 | ind==32
label drop ilbl

label def ilbl 15 "food products and beverages, tobacco" 
label def ilbl 17 "textiles", add
label def ilbl 18 "wearing apparel; dressing and dyeing of fur", add
label def ilbl 19 "leather preparation & goods", add
label def ilbl 20 "wood, cork and straw products, ex furniture", add
label def ilbl 21 "paper and paper products", add
label def ilbl 22 "publishing, printing and reproduction of recorded media", add
label def ilbl 24 "chemicals and chemical products incl coke & refined oil", add
label def ilbl 25 "rubber and plastics products", add
label def ilbl 26 "other non-metallic mineral products", add
label def ilbl 27 "basic metals", add
label def ilbl 28 "fabricated metal products, except machinery and equipment", add
label def ilbl 29 "machinery and equipment n.e.c.", add
label def ilbl 31 "electrical & computing machinery, comm. equipment", add
label def ilbl 33 "medical, precision and optical instruments, watches and clocks", add
label def ilbl 34 "motor vehicles, trailers and semi-trailers", add
label def ilbl 35 "other transport equipment", add
label def ilbl 36 "furniture; manufacturing n.e.c.", add
label def ilbl 37 "recycling", add
label val ind ilbl
label var ind "Reduced 2-digit ISIC classes"

*** Compute HHI for 4-digit industry
gen sales = totrev
label var sales "Revenue in millions of pesos"
egen sumsales = sum(sales),by(isic3 year)
gen sqshare = sales*sales/(sumsales*sumsales)
egen hhi = sum(sqshare), by(isic3 year)
replace hhi = 10000*hhi
drop sqshare
sort isic3 year
gen hhi2500 = hhi>2500
label var hhi "Herfindahl for firm's 4-digit industry"
label var hhi2500 "D(HHI for firm's 4-digit industry gt than 2500)"

*** Compute the share of foreign firms (weighted by sales) in the 4-digit industry
gen fsales = foreign*sales
egen sumfsales = sum(fsales),by(isic3 year)
gen shfor_sales = sumfsales/sumsales
drop sum*sales sales fsales
label var shfor_sales "Share of foreign sales in firm's 4-digit industry"

*** Table B-5
gen all = 1
tabstat all hhi2500 if isic3~=isic3[_n-1] | year~=year[_n-1], by(ind) stat(sum)
tabstat hhi shfor_sales if isic3~=isic3[_n-1] | year~=year[_n-1], by(ind)
drop all

sort id year
foreach var of varlist logse logae logme {
  gen d`var' = (`var'-`var'[_n-1])/(year-year[_n-1]) if id==id[_n-1]
  drop if abs(d`var')>3 & d`var'~=.
}

*** drop outliers in growth rates
tab year,gen(y_)
tab ind,gen(i_)

reg logs loge loga logm y_1-y_10 i_1-i_17
predict tfp,resid

foreach var of varlist loge logs logm loga tfp {
  gen `var'l = `var'[_n-1] if id==id[_n-1]
  gen d`var' = (`var'-`var'l)/(year-year[_n-1])
  sum d`var', det
  drop if abs(d`var')>2.3 & d`var'~=.
 }
	  
label var dloge "Annual growth in employment"
label var dlogs "Annual growth in revenue"
label var dlogm "Annual growth in materials"
label var dloga "Annual growth in capital"
label var dtfp "Annual change in TFP"

drop y_* i_* log*l tfpl
egen nyear = count(year),by(id)
drop if nyear<2
label var nyear "Number of years for this firm"

gen first = id~=id[_n-1]
gen gap = id==id[_n-1] & year~=year[_n-1]+1 
egen ngap = sum(gap),by(id)
tab ngap
tab ngap first, column
label var gap "D(data gap between this year and last obs)"
label var ngap "Number of gaps for this firm"
drop first

*** add market share data
merge 1:1 id year using $DIR\mktshare, keep(match master)

label var dlogae "Annual growth in capital stock per employee"           
label var dlogme "Annual growth in materials per employee"           
label var dlogse "Annual growth in sales per employee"           
label var firstyr_ip "First year the firm used any IP"
label var firstyr_oth "First year the firm used other IP"               
label var firstyr_pat "First year the firm used patents"              
label var firstyr_tm "First year the firm used trademarks"            
label var hhi "Herfindahl for the firm's 2-digit industry"                
label var loga "Log capital stock"
label var logae "Log capital stock per employee"
label var loge "Log employemnt"
label var logelec  "Log electricity consumption"
label var logi "Log capital expenditures"
label var logm "Log materials consumption"
label var logme "Log materials per employee"
label var logpr "Log payroll expense"
label var logs "Log sales (revenue)" 
label var logse "Log sales per employee"
label var logselec  "Log sales per electricity consumption"          
label var logy "Log value added" 
label var logye "Log value added per employee"
label var share4 "C4 for the firm's industry"
label var shfor_sales "Share of foreign sales in the firm's industry"

sort id year
do $DIR\cleanfile
sum
save $DIR\sample,replace
duplicates report id year
codebook id

log close



